#############################################################################
# Figure4.R
# Aim: to replicate Figure 4 in Atsusaka and Stevenson (2021)
# Run time: about 1 seconds
#############################################################################

#############################################################################
# (1) WRITE A FUNCTION FOR SENSITIVTY ANALYSIS
#############################################################################

library(tidyverse)
library(scales)
rm(list=ls())
start <- Sys.time()

cmBound = function(p, est, n, dq, n.dq, ...){
  
  N=n
  N.dq=n.dq

  pi.hat.naive = est
  pi.hat.naive.var = (pi.hat.naive*(1-pi.hat.naive))/(N-1) +
    (p*(1-p))/((N-1)*((2*p-1)^2))
  pi.hat.naive.sd = sqrt(pi.hat.naive.var)

  lambda.hat = (est*p)+(1-est)*(1-p) # Naive Lambda Hat
  gamma.hat = rev(seq(from=0.5, to=1, by=0.01)) # Vector of attentitive rate
  Bias.hat = (1/2)*((lambda.hat-0.5)/(p-0.5)) - (1/(2*gamma.hat))*((lambda.hat-0.5)/(p-0.5))
  
  pi.hat.bc = rep(pi.hat.naive, length(gamma.hat)) - Bias.hat # Bias Correction
  pi.hat.bc = ifelse(pi.hat.bc < 1, pi.hat.bc, 1) # Logical bound
  pi.hat.bc = ifelse(pi.hat.bc > 0, pi.hat.bc, 0) # Logical bound
  pi.hat.bc.var = (1/(gamma.hat^2))*
    ((pi.hat.bc*(1-pi.hat.bc))/(N-1) +
       (p*(1-p))/((N-1)*((2*p-1)^2)))
  pi.hat.bc.sd = sqrt(pi.hat.bc.var)
  
  mean = pi.hat.bc
  low  = pi.hat.bc-1.96*pi.hat.bc.sd; low  = ifelse(low > 0, low, 0)
  high = pi.hat.bc+1.96*pi.hat.bc.sd; high = ifelse(high < 1, high, 1)
  
  dq.var = dq*(1-dq)/(N.dq-1)
  dq.sd = sqrt(dq.var)
  dq.upper = min(dq+1.96*dq.sd,1)
  dq.lower = max(dq-1.96*dq.sd,0)
    
  perinat = (1-gamma.hat)*100  
  ggdata <- as.data.frame(cbind(mean, low, high, perinat, dq))
  
  x <- c(perinat[1], perinat, rev(perinat[-1])) # Left-bottom, left-top, right-top, right-bottom
  y <- c(low[1],high,rev(low[-1]))
  
  
 plot(perinat, mean, type="n", ylim=c(0,0.5), las=1, ylab="", xlab="", ...)
      polygon(x,y,col=gray(0.9), border=NA)
 #     abline(h=dq, col="gray50", lty=2, lwd=2)
      points(x=0, y=dq[1], pch=16, cex=1.5)
      arrows(x0=0, x1=0, y0=dq.lower[1], y1=dq.upper, lwd=1.8, length=0)      
      abline(h=dq.upper, col="gray50", lty=2)
      abline(h=dq.lower, col="gray50", lty=2)
      lines(perinat, mean, lwd=1.8, col="firebrick4")
      points(x=0, y=mean[1], pch=0, cex=1.5, col="firebrick4")
box()


}


#############################################################################
# (2) APPLY SENSITIVTY ANALYSIS TO SELECTED STUDIES
#############################################################################

dt <- read_csv("Existing_Works.csv") %>%
      mutate(est = CrosswiseEstimate,
             p = NonsensitiveProbability,
             dq = DirectEstimate) %>%
      dplyr::select(Application, est, p, dq, Nc, Nd) %>% 
      filter(est>=0.05 & est<0.5 & est >dq & Nc>200) %>%
      drop_na() %>%
      arrange(Application)

app.vec <- dt$Application
est.vec <- dt$est
p.vec <- dt$p
dq.vec <- dt$dq
Nc.vec <- dt$Nc
Nd.vec <- dt$Nd


myrow <- length(app.vec)/5
myrow <- round(myrow, digit=0)

{pdf("Figure4.pdf", width=9, height=6)
par(mfrow=c(myrow,5), mar=c(2,2,0.5,0.5), oma=c(2.2,2.2,1,0.5)+0.02, mgp=c(3,0.6,0))
for(i in 1:length(app.vec)){
                     cmBound(p=p.vec[i],
                         est=est.vec[i],
                         n=Nc.vec[i],
                         dq=dq.vec[i],
                         n.dq=Nd.vec[i])
  
pos <- ifelse(app.vec[i] %in% c("prejudice", "refugee", "shoplifting", "non-voting", "infidelity"), 
              0.1, 0.4) # Dodging Title
text(app.vec[i], x=20, y=pos, font=2, cex=1.5)
if(i==1){
  text(x=25, 0.25, labels="Reported estimate", col="firebrick4")
  arrows(x0=8,x1=2,y0=0.22,y1=est.vec[i]+0.02,length=0.05, col="firebrick4") 
}else if(i==6)
{text(x=17, 0.03, labels="Direct questioning")
 arrows(x0=5, x1=0, y0=0.06, y1=0.15, length=0.05)
  }


}

mtext("Prevalence of Sensitive Attributes", side=2, line=0.6, outer=T)
mtext("Percentage of Inattentive Respondents", side=1, line=0.6, outer=T)

dev.off()
}




#############################################################################
# (2) APPLY SENSITIVTY ANALYSIS TO ALL STUDIES
#############################################################################

dt <- read_csv("Existing_Works.csv") %>%
      filter(is.na(OnlyMention))

length(dt$Title)
length(unique(dt$Title))



dt <- read_csv("Existing_Works.csv") %>%
      mutate(est = CrosswiseEstimate,
             est = ifelse(est<0.5, est, 1-est),   # FLIP THE ESTIMATE FOR CONVENIENCE
             p = NonsensitiveProbability,
             dq = DirectEstimate) %>%
      dplyr::select(Application, est, p, dq, Nc, Nd, Title, Author) %>% 
      drop_na() %>%
      arrange(Application)

# View(dt) # Check the spread sheet if necessary

length(unique(dt$Title))

app.vec <- dt$Application
est.vec <- dt$est
p.vec <- dt$p
dq.vec <- dt$dq
Nc.vec <- dt$Nc
Nd.vec <- dt$Nd


myrow <- length(app.vec)/5
myrow <- round(myrow, digit=0)

pdf("FigureC1.pdf", width=9, height=12)
par(mfrow=c(myrow,5), mar=c(2,2,0.5,0.5), oma=c(2.2,2.2,1,0.5)+0.02, mgp=c(3,0.6,0))
for(i in 1:length(app.vec)){
                     cmBound(p=p.vec[i],
                         est=est.vec[i],
                         n=Nc.vec[i],
                         dq=dq.vec[i],
                         n.dq=Nd.vec[i])
  
pos <- ifelse(app.vec[i] %in% c("prejudice", "refugee", "shoplifting", "non-voting"), 
              0.1, 0.4) # Dodging Title
text(app.vec[i], x=20, y=pos, font=2, cex=1.5)
if(i==1){
  text(x=25, 0.25, labels="Reported estimate", col="firebrick4")
  arrows(x0=8,x1=2,y0=0.22,y1=est.vec[i]+0.02,length=0.05, col="firebrick4") 
}else if(i==6)
  {text(x=17, 0.03, labels="Direct questioning")}
}

mtext("Prevalence of Sensitive Attributes", side=2, line=0.6, outer=T)
mtext("Percentage of Inattentive Respondents", side=1, line=0.6, outer=T)

dev.off()

end <- Sys.time()
start - end

#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################